Here we will simulate two symptoms which give rise to a disease. The symptoms are not heritable. However, there are genetic effects on the D matrix in the LDL decomposition of \(\Sigma\). We control the heritability of D. We also make a couple more simplifying design choices: \(L\) is lower triangular and contains ones only so the covariance of the symptoms depends only on \(D_{1,1}\).
N <- 1000
l <- 100
reps <- 5
p <- 2
hercovs <- seq(0,1,0.2)
her <- 0
L <- matrix(0,nrow=p,ncol=p)
L[lower.tri(L,diag=T)] <- 1
diag(L) <- 1
rows <- data.frame()
for (r in c(1:reps)){
print(r)
for (hercov in hercovs){
print(hercov)
G <- simulate_genotypes(N = N,L = l)
G <- scale(G)
beta_cov <- matrix(mvrnorm(n=l,mu = rep(0,p),Sigma = diag(p)),nrow=l) %*% diag(3,p,p) * sqrt(hercov / l)
e_cov <- matrix(mvrnorm(n=N,mu = rep(0,p),Sigma = diag(p)),nrow=N) %*% diag(3,p,p) * sqrt(1-hercov)
D_vectors <- G %*% beta_cov + e_cov + matrix(10,nrow=N,ncol=p)
D_vectors[D_vectors<0] <- 0.1
beta <- matrix(mvrnorm(n=l,mu = rep(0,p),Sigma = diag(5,p,p)),nrow=l) %*% diag(1,p,p) * sqrt(her / l)
e <- matrix(mvrnorm(n=N,mu = rep(0,p),Sigma = diag(5,p,p)),nrow=N) * sqrt(1-her)
X <- matrix(0,nrow=N,ncol=p)
# Give each person a covariance matrix and draw symptoms
for (ind in c(1:N)){
# make into matrix
D_vectors[ind,2] <- 1 # setting this to one to control the correlation
Sig_ind <- L %*% diag(D_vectors[ind,]) %*% t(L)
beta_transformed <- t(chol(Sig_ind)) %*% t(beta) %>% t() # check this
e_transformed <- t(chol(Sig_ind)) %*% e[ind,] %>% t() # check this
X[ind,] <- G[ind,] %*% beta_transformed + e_transformed
}
# setting the threshold to keep constant prevalence
c <- 1
n <- 1
Y <- apply(X,1,mdd_risk,threshold=c,n=n)
prev <- sum(Y)/N
# calculating heritabilities
h2D <- greml(D_vectors[,1],G)$h2
h2IP <- greml(X[,1] * X[,2],G)$h2
h2Y <- greml(Y,G)$h2
rows <- rbind(rows,data.frame("h2_symptoms"=her,
"h2_Y"=h2Y,
"h2_IP"=h2IP,
"h2_D"=h2D,
"prev"=sum(Y),
"rep"=r,
"P"=p,
"c"=c,
"n"=n,
"hercov"=hercov))
}
}
## [1] 1
## [1] 0
## [1] 0.2
## [1] 0.4
## [1] 0.6
## [1] 0.8
## [1] 1
## [1] 2
## [1] 0
## [1] 0.2
## [1] 0.4
## [1] 0.6
## [1] 0.8
## [1] 1
## [1] 3
## [1] 0
## [1] 0.2
## [1] 0.4
## [1] 0.6
## [1] 0.8
## [1] 1
## [1] 4
## [1] 0
## [1] 0.2
## [1] 0.4
## [1] 0.6
## [1] 0.8
## [1] 1
## [1] 5
## [1] 0
## [1] 0.2
## [1] 0.4
## [1] 0.6
## [1] 0.8
## [1] 1
rows %>%
pivot_longer(cols=c("h2_D","h2_Y","h2_IP"),names_to = "feature",values_to = "GREML heritability") %>%
ggplot(aes(x=hercov,col=feature,y=`GREML heritability`))+
geom_point() +
geom_smooth(method="lm",se=F,linetype=2,linewidth=0.5,alpha=0.5)
## `geom_smooth()` using formula = 'y ~ x'
labs(x="covariance heritability (heritability of D)")
## $x
## [1] "covariance heritability (heritability of D)"
##
## attr(,"class")
## [1] "labels"
We can see that the entries of D are heritable, but the disease and individual-level product are not very heritable. Why?
Let’s do a rerun of the last simulation results where the covariance heritability was 1, but add more individuals and loci.
N <- 2000
l <- 100
hercov <- 1
G <- simulate_genotypes(N = N,L = l)
G <- scale(G)
beta_cov <- matrix(mvrnorm(n=l,mu = rep(0,p),Sigma = diag(p)),nrow=l) %*% diag(3,p,p) * sqrt(hercov / l)
e_cov <- matrix(mvrnorm(n=N,mu = rep(0,p),Sigma = diag(p)),nrow=N) %*% diag(3,p,p) * sqrt(1-hercov)
D_vectors <- G %*% beta_cov + e_cov + matrix(10,nrow=N,ncol=p)
D_vectors[D_vectors<0] <- 0.1
beta <- matrix(mvrnorm(n=l,mu = rep(0,p),Sigma = diag(5,p,p)),nrow=l) %*% diag(1,p,p) * sqrt(her / l)
e <- matrix(mvrnorm(n=N,mu = rep(0,p),Sigma = diag(5,p,p)),nrow=N) * sqrt(1-her)
X <- matrix(0,nrow=N,ncol=p)
# Give each person a covariance matrix and draw symptoms
for (ind in c(1:N)){
# make into matrix
D_vectors[ind,2] <- 1 # setting this to one to control the correlation
Sig_ind <- L %*% diag(D_vectors[ind,]) %*% t(L)
beta_transformed <- t(chol(Sig_ind)) %*% t(beta) %>% t() # check this
e_transformed <- t(chol(Sig_ind)) %*% e[ind,] %>% t() # check this
X[ind,] <- G[ind,] %*% beta_transformed + e_transformed
}
# setting the threshold to keep constant prevalence
c <- 5
n <- 1
Y <- apply(X,1,mdd_risk,threshold=c,n=n)
We’re going to calculate and normalise the individual-level product as they do in CLIP, as \(IP=\frac{(x_1-\bar{x_1})(x_2-\bar{x_2})}{\sqrt{{x_1}{x_2}}}\).
phenos <- data.frame(X,Y,D=D_vectors[,1]) %>%
mutate(IP12=((X1-mean(X1))* (X2-mean(X2)))/sqrt(var(X1)*var(X2))) # normalised IP
Let’s visualise how the symptoms, and the IP, relates to the disease. s
ggplot(phenos) + geom_point(aes(x=X1,y=X2,col=IP12),shape=16) + scale_color_viridis_c()
ggplot(phenos) + geom_point(aes(x=X1,y=X2,col=Y),shape=16)
ggplot(phenos) + geom_point(aes(x=D,y=IP12,col=Y))
And now we’ll run GWAS on: * the entries of D: this should be easy, and we should get the covariance QTLs * the IP: might be trickier, but there should be some signal * the disease: we hope to have some signal from the covariance QTLs… * the symptoms: these are not directly heritable, so hopefully just noise
apply(G,2,function(x) lm(phenos$IP12 ~ x)$coefficients[2]) -> GWAS_IP
apply(G,2,function(x) lm(phenos$D ~ x)$coefficients[2]) -> GWAS_D
apply(G,2,function(x) lm(phenos$X1 ~ x)$coefficients[2]) -> GWAS_X1
apply(G,2,function(x) lm(phenos$X2 ~ x)$coefficients[2]) -> GWAS_X2
apply(G,2,function(x) lm(phenos$Y ~ x)$coefficients[2]) -> GWAS_Y
data.frame(beta=beta_cov[,1],
beta_hat_D=GWAS_D,
beta_hat_IP=GWAS_IP,
beta_hat_Y=GWAS_Y,
beta_hat_X1=GWAS_X1,
beta_hat_X2=GWAS_X2
) -> GWAS
How do the estimated effect sizes compare to the real ones and to each other?
GWAS %>% ggplot(aes(x=beta,y=beta_hat_D)) + geom_point() +geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
GWAS %>% ggplot(aes(x=beta,y=beta_hat_IP)) + geom_point() +geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
GWAS %>% ggplot(aes(x=beta,y=beta_hat_X1)) + geom_point() +geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
GWAS %>% ggplot(aes(x=beta,y=beta_hat_X2)) + geom_point() +geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
GWAS %>% ggplot(aes(x=beta,y=beta_hat_Y)) + geom_point() +geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
So GWAS is good at finding D entries, but not so good at finding individual-level products.
GWAS %>% cor() %>% ggcorrplot(method="circle")
This correlation plot shows how the effect size estimates are correlated across the different phenotypes available. If we look at the beta row, we see that there is signal in the estimated effect sizes on D and to some extent on the IP and Y, but not on the symptoms. This is what we want!
If we look at the beta_hat_Y row, we see that the signal is shared across the symptoms (makes sense, since the disease is a function of the symptoms…?) and a little across the covariance QTLs.
Now I will model genetic effects on the bottom left entry of L, which controls the covariance: \(cov(X_1,X_2)=L_{2,1}D_1\) and the variance of \(X_2\): \(Var(X_2)=L_1^2D_1 + D_2\). The entries of D will be constant, and I will keep them at 1.
D <- diag(1,2,2)
L <- matrix(0,nrow=p,ncol=p)
L[lower.tri(L,diag=T)] <- 1
diag(L) <- 1
beta_cov <- matrix(mvrnorm(n=l,mu = rep(0,1),Sigma = diag(1)),nrow=l) %*% diag(2,1,1) * sqrt(hercov / l)
e_cov <- matrix(mvrnorm(n=N,mu = rep(0,1),Sigma = diag(1)),nrow=N) %*% diag(2,1,1) * sqrt(1-hercov)
L_vectors <- G %*% beta_cov + e_cov
beta <- matrix(mvrnorm(n=l,mu = rep(0,p),Sigma = diag(5,p,p)),nrow=l) %*% diag(1,p,p) * sqrt(her / l)
e <- matrix(mvrnorm(n=N,mu = rep(0,p),Sigma = diag(5,p,p)),nrow=N) * sqrt(1-her)
XL <- matrix(0,nrow=N,ncol=p)
# Give each person a covariance matrix and draw symptoms
for (ind in c(1:N)){
# make into matrix
L_ind <- L
L_ind[2,1] <- L_vectors[ind]
Sig_ind <- L_ind %*% D %*% t(L_ind)
beta_transformed <- t(chol(Sig_ind)) %*% t(beta) %>% t() # check this
e_transformed <- t(chol(Sig_ind)) %*% e[ind,] %>% t() # check this
XL[ind,] <- G[ind,] %*% beta_transformed + e_transformed
}
# setting the threshold to keep constant prevalence
c <- 1
n <- 1
YL <- apply(XL,1,mdd_risk,threshold=c,n=n)
prev <- sum(YL)/N
# calculating heritabilities
h2L <- greml(L_vectors,G)$h2
h2IP <- greml(XL[,1] * XL[,2],G)$h2
h2Y <- greml(YL,G)$h2
print(h2L)
## [1] 1.404324
print(h2IP)
## [1] 1.275842
print(h2Y)
## [1] 1.829318
Note how the heritability of the individual-level product, and of the disease are much higher now.
phenosL <- data.frame(XL,Y=YL,L=L_vectors) %>%
mutate(IP12=((X1-mean(X1))* (X2-mean(X2)))/sqrt(var(X1)*var(X2))) # normalised IP
Plotting various things:
ggplot(phenosL) + geom_point(aes(x=X1,y=X2,col=IP12),shape=16) + scale_color_viridis_c()
ggplot(phenosL) + geom_point(aes(x=X1,y=X2,col=L),shape=16) + scale_color_viridis_c()
ggplot(phenosL) + geom_point(aes(x=X1,y=X2,col=Y),shape=16)
ggplot(phenosL) + geom_point(aes(x=L,y=IP12,col=Y))
What an odd pattern of symptoms.
apply(G,2,function(x) lm(phenosL$IP12 ~ x)$coefficients[2]) -> GWAS_IP
apply(G,2,function(x) lm(phenosL$L ~ x)$coefficients[2]) -> GWAS_L
apply(G,2,function(x) lm(phenosL$X1 ~ x)$coefficients[2]) -> GWAS_X1
apply(G,2,function(x) lm(phenosL$X2 ~ x)$coefficients[2]) -> GWAS_X2
apply(G,2,function(x) lm(phenosL$Y ~ x)$coefficients[2]) -> GWAS_Y
data.frame(beta=beta_cov[,1],
beta_hat_L=GWAS_L,
beta_hat_IP=GWAS_IP,
beta_hat_Y=GWAS_Y,
beta_hat_X1=GWAS_X1,
beta_hat_X2=GWAS_X2
) -> GWASL
How does GWAS do?
GWASL %>% ggplot(aes(x=beta,y=beta_hat_L)) + geom_point() +geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
GWASL %>% ggplot(aes(x=beta,y=beta_hat_IP)) + geom_point() +geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
GWASL %>% ggplot(aes(x=beta,y=beta_hat_X1)) + geom_point() +geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
GWASL %>% ggplot(aes(x=beta,y=beta_hat_X2)) + geom_point() +geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
GWASL %>% ggplot(aes(x=beta,y=beta_hat_Y)) + geom_point() +geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
GWASL %>% cor() %>% ggcorrplot(method="circle")
Better than when the entries are on D!